/*
* Copyright (c) 2020-2022, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file     encode_vp9_segmentation.cpp
//! \brief    Defines the common interface for vp9 encode segmentation features
//!

#include "encode_vp9_segmentation.h"
#include "encode_vp9_vdenc_const_settings.h"
#include "media_vp9_packet_defs.h"
#include "mhw_utilities.h"
#include "encode_vp9_tile.h"
#include "media_vp9_feature_defs.h"

namespace encode
{
Vp9Segmentation::Vp9Segmentation(
    MediaFeatureManager *featureManager,
    EncodeAllocator *    allocator,
    CodechalHwInterfaceNext *hwInterface,
    void *               constSettings) : MediaFeature(constSettings), m_allocator(allocator)
{
    ENCODE_FUNC_CALL();
    ENCODE_CHK_NULL_NO_STATUS_RETURN(featureManager);

    m_featureManager = featureManager;

    m_basicFeature = dynamic_cast<Vp9BasicFeature *>(featureManager->GetFeature(FeatureIDs::basicFeature));
    ENCODE_CHK_NULL_NO_STATUS_RETURN(m_basicFeature);

    ENCODE_CHK_NULL_NO_STATUS_RETURN(hwInterface);
    m_hwInterface = hwInterface;
}

Vp9Segmentation::~Vp9Segmentation()
{
    FreeResources();
}

MOS_STATUS Vp9Segmentation::Init(void *settings)
{
    ENCODE_FUNC_CALL();
    MEDIA_CHK_NULL_RETURN(settings);

    CodechalSetting *codecSettings = (CodechalSetting *)settings;

    ENCODE_CHK_STATUS_RETURN(AllocateResources());

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::Update(void *params)
{
    ENCODE_FUNC_CALL();
    ENCODE_CHK_NULL_RETURN(params);
    auto allocator = m_basicFeature->GetAllocator();
    ENCODE_CHK_NULL_RETURN(allocator);

    EncoderParams *encodeParams = (EncoderParams *)params;

    auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(encodeParams->pSeqParams);
    ENCODE_CHK_NULL_RETURN(vp9SeqParams);
    auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
    ENCODE_CHK_NULL_RETURN(vp9PicParams);
    m_vp9SegmentParams = static_cast<PCODEC_VP9_ENCODE_SEGMENT_PARAMS>(encodeParams->pSegmentParams);
    ENCODE_CHK_NULL_RETURN(m_vp9SegmentParams);

    m_enabled = vp9PicParams->PicFlags.fields.segmentation_enabled;

    m_segmentMapProvided = encodeParams->bSegmentMapProvided && vp9PicParams->PicFlags.fields.segmentation_enabled;
    // In MBBRC case, without a SegMap provided by the app, we need to set SegMapUpdate ON
    // as the SegMap is generated by HuC and it can be different for every frame
    if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided)
    {
        vp9PicParams->PicFlags.fields.segmentation_update_map = 1;
    }

    m_mbBrcEnabled                      = false;
    vp9SeqParams->SeqFlags.fields.MBBRC = MBBRC_DISABLED;

    // We do not support segmentation w/o seg map in CQP case, only support segmentation w/ seg map in CQP
    // BRC/ACQP supports segmentation both w/ and w/o seg map
    if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided && vp9SeqParams->RateControlMethod == RATECONTROL_CQP)
    {
        return MOS_STATUS_INVALID_PARAMETER;
    }
    // Need to index properly when more than one temporal layer is present
    ENCODE_ASSERT(vp9SeqParams->FrameRate[0].uiDenominator > 0);
    if (vp9SeqParams->FrameRate[0].uiDenominator == 0)
    {
        return MOS_STATUS_INVALID_PARAMETER;
    }
    uint32_t frameRate = vp9SeqParams->FrameRate[0].uiNumerator / vp9SeqParams->FrameRate[0].uiDenominator;

    if (!m_mbBrcEnabled)
    {
        m_mbStatsEnabled = false;
        if (m_segmentMapProvided)
        {
            m_mbSegmentMapSurface = *(encodeParams->psMbSegmentMapSurface);
            ENCODE_CHK_STATUS_RETURN(allocator->GetSurfaceInfo(&(m_mbSegmentMapSurface)));
        }
    }
    else
    {
        //Kernel C model fixed Qindex delta's when MBBRC is enabled
        int16_t segmentQIndexDelta[CODEC_VP9_MAX_SEGMENTS] = {0, -8, -6, -4, -2, 2, 4, 6};

        for (uint8_t i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
        {
            m_vp9SegmentParams->SegData[i].SegmentFlags.value  = 0;
            m_vp9SegmentParams->SegData[i].SegmentLFLevelDelta = 0;
            m_vp9SegmentParams->SegData[i].SegmentQIndexDelta  = segmentQIndexDelta[i];
        }
        m_mbStatsEnabled = true;
    }

    if (m_basicFeature->m_newSeq)
    {
        ENCODE_CHK_STATUS_RETURN(SetConstSettings());
        ENCODE_CHK_STATUS_RETURN(SetSequenceStructs());
    }

    ENCODE_CHK_STATUS_RETURN(SetupSegmentationStreamIn());

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::SetDmemForUpdate(void *params)
{
    ENCODE_FUNC_CALL();
    ENCODE_CHK_NULL_RETURN(params);

    auto dmem    = (HucBrcUpdateDmem *)params;
    ENCODE_CHK_NULL_RETURN(dmem);
    auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
    ENCODE_CHK_NULL_RETURN(vp9PicParams);

    // If app gives segment map, we honor the QP deltas provided, if not, and segmentation is enabled,
    // BRC generates the QP deltas and patches them into the segment states
    dmem->UPD_SegMapGenerating_U8 = vp9PicParams->PicFlags.fields.segmentation_enabled && !m_segmentMapProvided;

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::SetDmemForHucProb(void *params)
{
    ENCODE_FUNC_CALL();
    ENCODE_CHK_NULL_RETURN(params);

    auto dmem    = (HucProbDmem *)params;
    ENCODE_CHK_NULL_RETURN(dmem);
    auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
    ENCODE_CHK_NULL_RETURN(vp9PicParams);

    dmem->FrameCtrl.SegOn         = vp9PicParams->PicFlags.fields.segmentation_enabled;
    dmem->FrameCtrl.SegMapUpdate  = vp9PicParams->PicFlags.fields.segmentation_update_map;
    dmem->FrameCtrl.SegUpdateData = vp9PicParams->PicFlags.fields.seg_update_data;

    dmem->StreamInSegEnable = (uint8_t)m_segmentMapProvided;
    dmem->StreamInEnable    = (uint8_t)m_segmentMapProvided;  // Currently unused, if used may || with HME enabled

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::FreeResources()
{
    ENCODE_FUNC_CALL();

    if (m_mapBuffer)
    {
        MOS_FreeMemory(m_mapBuffer);
    }

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::SetSequenceStructs()
{
    ENCODE_FUNC_CALL();

    auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
    ENCODE_CHK_NULL_RETURN(vp9SeqParams);

    if ((vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED) || (vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED_TU_DEPENDENCY))
    {
        if (!m_segmentMapAllocated)
        {
            ENCODE_CHK_STATUS_RETURN(AllocateMbBrcSegMapSurface());
        }
    }
    else
    {
        //Allocated Driver MbBrc Segment map resource needs to be deallocated when MBBRC is disabled. The reason being
        //same segmnet map surface (sMbSegmentMapSurface) will be used in the driver referencing both the Application passed
        //as well as Driver allocated resource for segmentmap depending on mbbrc disabled or enabled.
        if (!Mos_ResourceIsNull(&m_mbSegmentMapSurface.OsResource) && m_segmentMapAllocated)
        {
            m_allocator->DestroyResource(&m_mbSegmentMapSurface.OsResource);
        }

        m_segmentMapAllocated = false;
    }


    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::AllocateMbBrcSegMapSurface()
{
    ENCODE_FUNC_CALL();

    MOS_RESOURCE *allocatedBuffer = nullptr;

    // MBBRC segment map surface needs to be allocated when mbbrc is enabled as segment map will not be
    // passed from APP when MBBRC is enabled
    uint32_t picWidthInMb  = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_basicFeature->m_maxPicWidth);
    uint32_t picHeightInMb = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_basicFeature->m_maxPicHeight);

    m_mbSegmentMapSurface.TileType      = MOS_TILE_LINEAR;
    m_mbSegmentMapSurface.bArraySpacing = true;
    m_mbSegmentMapSurface.Format        = Format_Buffer_2D;
    m_mbSegmentMapSurface.dwWidth       = MOS_ALIGN_CEIL(picWidthInMb, 4);
    m_mbSegmentMapSurface.dwHeight      = picHeightInMb;
    m_mbSegmentMapSurface.dwPitch       = MOS_ALIGN_CEIL(picWidthInMb, 64);

    MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
    MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));

    allocParamsForBuffer2D.Type     = MOS_GFXRES_2D;
    allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
    allocParamsForBuffer2D.Format   = Format_Buffer_2D;
    allocParamsForBuffer2D.dwWidth  = m_mbSegmentMapSurface.dwPitch;
    allocParamsForBuffer2D.dwHeight = picHeightInMb;
    allocParamsForBuffer2D.pBufName = "MBBRC driver Segment Map Surface";
    allocParamsForBuffer2D.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;

    uint32_t size = allocParamsForBuffer2D.dwWidth * allocParamsForBuffer2D.dwHeight;

    allocatedBuffer = m_allocator->AllocateResource(allocParamsForBuffer2D, true);
    ENCODE_CHK_NULL_RETURN(allocatedBuffer);
    m_mbSegmentMapSurface.OsResource = *allocatedBuffer;

    m_segmentMapAllocated = true;

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::SetupSegmentationStreamIn()
{
    ENCODE_FUNC_CALL();

    if (!m_segmentMapProvided && !m_basicFeature->m_hmeEnabled)
    {
      // If we're not going to use the streamin surface leave now
        return MOS_STATUS_SUCCESS;
    }

    ENCODE_CHK_NULL_RETURN(m_hwInterface);
    PMOS_INTERFACE osInterface = m_hwInterface->GetOsInterface();
    ENCODE_CHK_NULL_RETURN(osInterface);
    auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
    ENCODE_CHK_NULL_RETURN(basicFeature);
    ENCODE_CHK_NULL_RETURN(basicFeature->m_recycleBuf);

    auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
    ENCODE_CHK_NULL_RETURN(vp9PicParams);
    auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
    ENCODE_CHK_NULL_RETURN(vp9SeqParams);

    auto vdencStreamInBuffer = m_basicFeature->m_recycleBuf->GetBuffer(RecycleResId::StreamInBuffer, m_basicFeature->m_currRecycledBufIdx);
    ENCODE_CHK_NULL_RETURN(vdencStreamInBuffer);
    auto streamIn = static_cast<Vp9VdencStreamInState *>(m_allocator->LockResourceForWrite(vdencStreamInBuffer));
    ENCODE_CHK_NULL_RETURN(streamIn);

    // Align to cache line size is OK since streamin state is padded to cacheline size
    // - HW uses cacheline size to read, not command size
    uint32_t blockWidth   = MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
    uint32_t blockHeight  = MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
    uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
    MOS_ZeroMemory(streamIn, streamInSize);

    // If segment map isn't provided then we unlock surface and exit function here.
    // Reason why check isn't done before function call is to take advantage of the fact that
    // we need the surface locked here if seg map is provided and we want it 0'd either way.
    // This saves us from doing 2 locks on this buffer per frame
    if (!m_segmentMapProvided)
    {
        ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
        return MOS_STATUS_SUCCESS;
    }

    auto data = static_cast<uint8_t *>(m_allocator->LockResourceForRead(&m_mbSegmentMapSurface.OsResource));
    ENCODE_CHK_NULL_RETURN(data);

    bool tileEnabled = false;
    RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, IsEnabled, tileEnabled);

    if (tileEnabled)
    {
        uint32_t numTileColumns = (1 << vp9PicParams->log2_tile_columns);
        uint32_t numTileRows    = (1 << vp9PicParams->log2_tile_rows);
        uint32_t numTiles       = numTileColumns * numTileRows;
        uint32_t tileX          = 0;
        uint32_t tileY          = 0;

        uint32_t currTileStartX64Aligned = 0;
        uint32_t currTileStartY64Aligned = 0;  //Set tile Y coordinate 0
        uint32_t blocksRasterized        = 0;  //Count of rasterized blocks for this frame

        for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
        {
            tileX = tileIdx % numTileColumns;  //Current tile column position
            tileY = tileIdx / numTileColumns;  //Current tile row position

            currTileStartX64Aligned = ((tileX * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
            currTileStartY64Aligned = ((tileY * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;

            uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? basicFeature->m_picWidthInSb : (((tileX + 1) * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns)) *
                                              CODEC_VP9_SUPER_BLOCK_WIDTH) -
                                          currTileStartX64Aligned;

            uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? basicFeature->m_picHeightInSb : (((tileY + 1) * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows)) *
                                               CODEC_VP9_SUPER_BLOCK_HEIGHT) -
                                           currTileStartY64Aligned;

            // Last tile col raw width and raw height - not necessarily 64 aligned,
            // use this length to duplicate values from segmap for empty padding blocks in last tiles
            uint32_t lastTileColWidth  = (tileX == (numTileColumns - 1)) ? (basicFeature->m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
            uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (basicFeature->m_frameHeight - currTileStartY64Aligned) : tileHeight64Aligned;

            uint32_t tileWidth  = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
            uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;

            // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
            // which was processed from this frame or previous,
            // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
            EncodeTileData tileData = {};
            RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileByIndex, tileData, tileIdx);

            if (!m_mapBuffer ||
                tileWidth != m_segStreamInWidth ||
                tileHeight != m_segStreamInHeight ||
                numTileColumns != tileData.numOfTileColumnsInFrame ||
                numTiles != tileData.numOfTilesInFrame)
            {
                ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(
                    tileWidth,
                    tileHeight,
                    currTileStartX64Aligned,
                    currTileStartY64Aligned,
                    blocksRasterized));
            }
            tileData.numOfTileColumnsInFrame = numTileColumns;
            tileData.numOfTilesInFrame       = numTiles;
        }
    }

    uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
    if (osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
    {
        // Application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer.
        // Driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
        dwPitch = MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
    }

    auto tuSettings = static_cast<Vp9VdencFeatureSettings *>(m_constSettings);
    ENCODE_CHK_NULL_RETURN(tuSettings);

    if (false == TargetUsage::isValid(vp9SeqParams->TargetUsage))
    {
        ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));
        ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
        return MOS_STATUS_INVALID_PARAMETER;
    }

    // Set seg ID's of streamin states
    for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
    {
        uint32_t addrOffset = CalculateBufferOffset(
            m_mapBuffer[i],
            basicFeature->m_frameWidth,
            vp9PicParams->PicFlags.fields.seg_id_block_size,
            dwPitch);

        uint32_t segId = *(data + addrOffset);

        streamIn[i].DW7.SegidEnable               = 1;
        streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);

        // TU functions copied from there
        streamIn[i].DW0.Maxtusize = 3;

        streamIn[i].DW0.Maxcusize = 3;
        // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
        if ((i % 4) == 3 && basicFeature->m_pictureCodingType == P_TYPE)
        {
            if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
                    streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
                    streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
            {
                streamIn[i - 3].DW0.Maxcusize =
                    streamIn[i - 2].DW0.Maxcusize =
                        streamIn[i - 1].DW0.Maxcusize =
                            streamIn[i].DW0.Maxcusize = 2;
            }
        }

        streamIn[i].DW0.Numimepredictors         = tuSettings->NumImePredictors[vp9SeqParams->TargetUsage];
        streamIn[i].DW6.Nummergecandidatecu8X8   = tuSettings->NumMergeCandidateCu8x8[vp9SeqParams->TargetUsage];
        streamIn[i].DW6.Nummergecandidatecu16X16 = tuSettings->NumMergeCandidateCu16x16[vp9SeqParams->TargetUsage];
        streamIn[i].DW6.Nummergecandidatecu32X32 = tuSettings->NumMergeCandidateCu32x32[vp9SeqParams->TargetUsage];
        streamIn[i].DW6.Nummergecandidatecu64X64 = tuSettings->NumMergeCandidateCu64x64[vp9SeqParams->TargetUsage];

    }

    ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));

    ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS Vp9Segmentation::InitZigZagToRasterLUTPerTile(
    uint32_t  tileWidth,
    uint32_t  tileHeight,
    uint32_t  currTileStartXInFrame,
    uint32_t  currTileStartYInFrame,
    uint32_t &blocksRasterized)
{
    ENCODE_FUNC_CALL();

    auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
    ENCODE_CHK_NULL_RETURN(basicFeature);

    // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned)).
    // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
    // We keep this map around until sequence is finished, it's deleted at device destruction
    if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
    {
        // Free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
        if (m_mapBuffer)
        {
            MOS_FreeMemory(m_mapBuffer);
        }
        // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index
        m_mapBuffer = (uint32_t *)MOS_AllocAndZeroMemory(
            (MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
            (MOS_ALIGN_CEIL(basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
            sizeof(int32_t));  // Framewidth and height are 64 aligned already
    }
    ENCODE_CHK_NULL_RETURN(m_mapBuffer);

    uint32_t  align64Width32         = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
    uint32_t  align64Height32        = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
    uint32_t *mapBufferZigZagPerTile = (uint32_t *)MOS_AllocAndZeroMemory(align64Width32 * align64Height32 * sizeof(uint32_t));
    ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);

    m_segStreamInWidth  = basicFeature->m_frameWidth;
    m_segStreamInHeight = basicFeature->m_frameHeight;

    uint32_t count32 = 0;  // Number of 32 by 32 blocks that will be processed here
    for (uint32_t curr32YInTile = 0; curr32YInTile < align64Height32; curr32YInTile++)
    {
        for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
        {
            mapBufferZigZagPerTile[count32++] = GetSegmentBlockIndexInFrame(
                basicFeature->m_frameWidth,
                curr32XInTile,
                curr32YInTile,
                currTileStartXInFrame,
                currTileStartYInFrame);
        }
    }

    //    mapBufferZigZagPerTile --->   m_mapBuffer
    //  | a b c d ...            ---> | a b W X c d Y Z ....
    //  | W X Y Z ...
    uint32_t num32blocks     = align64Width32 * align64Height32;
    uint32_t tileOffsetIndex = blocksRasterized;
    for (uint32_t i = 0, rasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
    {
        for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
        {
            m_mapBuffer[j + tileOffsetIndex]     = mapBufferZigZagPerTile[rasterCount++];
            m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
        }
        for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
        {
            m_mapBuffer[j + tileOffsetIndex]     = mapBufferZigZagPerTile[rasterCount++];
            m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
        }
    }
    // Free per tile map buffer as it has been rasterized and copied into the mapbuffer
    if (mapBufferZigZagPerTile)
    {
        MOS_FreeMemory(mapBufferZigZagPerTile);
    }

    // Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
    uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
    if (width32 != align64Width32)  // replicate last column
    {
        for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
        {
            m_mapBuffer[i + tileOffsetIndex]     = m_mapBuffer[i + tileOffsetIndex - 1];
            m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
        }
    }

    uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
    if (height32 != align64Height32)  // replicate last row
    {
        for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
        {
            m_mapBuffer[i + tileOffsetIndex]     = m_mapBuffer[i + tileOffsetIndex - 2];
            m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
        }
    }
    // Index offset to be added to the buffer for the next tile depending on how many
    // blocks were rasterized already in this tile
    blocksRasterized += count32;

    return MOS_STATUS_SUCCESS;
}

uint32_t Vp9Segmentation::GetSegmentBlockIndexInFrame(
    uint32_t frameWidth,
    uint32_t curr32XInTile,
    uint32_t curr32YInTile,
    uint32_t currTileStartX64aligned,
    uint32_t currTileStartY64aligned)
{
    ENCODE_FUNC_CALL();
    uint32_t frameWidthIn32     = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
    uint32_t curr32XInFrame     = currTileStartX64aligned / 32 + curr32XInTile;
    uint32_t curr32YInFrame     = currTileStartY64aligned / 32 + curr32YInTile;
    uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
    return curr32BlockInFrame;
}

uint32_t Vp9Segmentation::CalculateBufferOffset(uint32_t idx, uint32_t width, uint32_t blockSize, uint32_t bufferPitch)
{
    uint32_t y = idx / (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);
    uint32_t x = idx % (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);

    switch (blockSize)
    {
    case 0:  // 16x16
        x *= 2;
        y *= 2;
        break;
    case 1:  // 32x32 (no multiplier since streamin chunks are for 32x32)
        break;
    case 2:  // 64x64
        x /= 2;
        y /= 2;
        break;
    case 3:  // 8x8
        x *= 4;
        y *= 4;
        break;
    }

    uint32_t addr = y * bufferPitch;
    addr += x;

    return addr;
}

MOS_STATUS Vp9Segmentation::SetSegmentId(uint8_t segmentId)
{
    m_segmentId = segmentId;

    return MOS_STATUS_SUCCESS;
}

MHW_SETPAR_DECL_SRC(HCP_VP9_SEGMENT_STATE, Vp9Segmentation)
{
    ENCODE_FUNC_CALL();

    auto vp9SegData = m_vp9SegmentParams->SegData[m_segmentId];

    params.segmentId = m_segmentId;

    params.segmentSkipped          = vp9SegData.SegmentFlags.fields.SegmentSkipped;
    params.segmentReference        = vp9SegData.SegmentFlags.fields.SegmentReference;
    params.segmentReferenceEnabled = vp9SegData.SegmentFlags.fields.SegmentReferenceEnabled;

    params.segmentLfLevelDeltaEncodeModeOnly = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentLFLevelDelta, 7);
    params.segmentQindexDeltaEncodeModeOnly  = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentQIndexDelta, 9);

    return MOS_STATUS_SUCCESS;
}

MHW_SETPAR_DECL_SRC(VDENC_CMD2, Vp9Segmentation)
{
    ENCODE_FUNC_CALL();

    MHW_MI_CHK_NULL(m_basicFeature->m_vp9PicParams);
    auto vp9PicParams = m_basicFeature->m_vp9PicParams;

    bool segmentationEnabled = vp9PicParams->PicFlags.fields.segmentation_enabled;

    uint8_t lumaAcqIndex      = vp9PicParams->LumaACQIndex;
    uint8_t lumaDcqIndexDelta = vp9PicParams->LumaDCQIndexDelta;

    MHW_MI_CHK_NULL(m_vp9SegmentParams);
    MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
    MOS_ZeroMemory(&segmentState, sizeof(segmentState));
    segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
    segmentState.Mode                    = m_basicFeature->m_mode;
    CODEC_VP9_ENCODE_SEG_PARAMS *segData = segmentState.pVp9EncodeSegmentParams->SegData;

    params.vdencStreamIn = m_segmentMapProvided || m_basicFeature->m_16xMeEnabled;

    if (segmentationEnabled)
    {
        // DW24

        params.qpForSegs[0] = lumaAcqIndex + segData[0].SegmentQIndexDelta;  // QpForSeg0
        params.qpForSegs[1] = lumaAcqIndex + segData[1].SegmentQIndexDelta;  // QpForSeg1
        params.qpForSegs[2] = lumaAcqIndex + segData[2].SegmentQIndexDelta;  // QpForSeg2
        params.qpForSegs[3] = lumaAcqIndex + segData[3].SegmentQIndexDelta;  // QpForSeg3

        // DW25

        params.qpForSegs[4] = lumaAcqIndex + segData[4].SegmentQIndexDelta;  // QpForSeg4
        params.qpForSegs[5] = lumaAcqIndex + segData[5].SegmentQIndexDelta;  // QpForSeg5
        params.qpForSegs[6] = lumaAcqIndex + segData[6].SegmentQIndexDelta;  // QpForSeg6
        params.qpForSegs[7] = lumaAcqIndex + segData[7].SegmentQIndexDelta;  // QpForSeg7
    }
    else  // Segmentation disabled
    {
        // DW24

        params.qpForSegs[0] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg0
        params.qpForSegs[1] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg1
        params.qpForSegs[2] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg2
        params.qpForSegs[3] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg3

        // DW25

        params.qpForSegs[4] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg4
        params.qpForSegs[5] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg5
        params.qpForSegs[6] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg6
        params.qpForSegs[7] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg7
    }

    return MOS_STATUS_SUCCESS;
}

}  // namespace encode
